#! /home/chunwei/chunenv/bin/python
# -*- coding: utf-8 -*-
from __future__ import division
import sys, argparse
import csv
import time as t
import datetime as d
import pandas as pd
import numpy as np
from collections import Counter
'''
统计用户上的课数目
'''
def parse_args():
    if len(sys.argv) == 1:
        sys.argv.append('-h')

    parser = argparse.ArgumentParser()
    parser.add_argument('enrollment_path')
    parser.add_argument('output')
    args = vars(parser.parse_args())
    return args

args = parse_args()

enrollment_path = args['enrollment_path']
output = args['output']

user_course = {}


data = pd.read_csv(enrollment_path)

for  enrollment_id, username, course_id in data.values:
    if username not in user_course:
        user_course[username] = set()
    user_course[username].add(course_id)

usernames = []
num_course = []

for username, course_ids in user_course.items():
    usernames.append(username)
    num_course.append(len(course_ids))
assert(len(usernames) == len(num_course))

usernames = np.array(usernames)
num_course = np.array(num_course)

out_data = pd.DataFrame()
out_data['username'] = usernames
out_data['num_course'] = num_course

out_data.to_csv(output, index=False)
